package com.etc

import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.recommendation.{ALS, Rating}
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}



/**
  * 线性回归
  */
object linner {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName("linner")
    val spark = SparkSession.builder().config(conf).getOrCreate()

    val file = spark.read.format("csv").option("sep",";").option("header","true").load("house.csv")

    import spark.implicits._

    //生成随机数
    val random = new util.Random()

    val data = file.select("square","price").map(rows => (rows.getAs[String](0).toDouble,rows.getString(1)
      .toDouble,random.nextDouble())).toDF("square","price","random")
        .sort("random")


    //类似封装成 数组
    val assembler = new VectorAssembler()
        .setInputCols(Array("square"))
        .setOutputCol("features")

    val frame = assembler.transform(data)

    //把数据集拆分2个部分
    val Array(train,test) = frame.randomSplit(Array(0.8,0.2),1L)

    //创建线性回归的示例
    val regression = new LinearRegression()
        .setMaxIter(10)//训练轮次
        .setRegParam(0.3)//正则化
        .setElasticNetParam(0.8)//推荐值

    //Features 特征向量     label 标签
    val model = regression.setLabelCol("price").setFeaturesCol("features").fit(train)



    model.transform(test).show()

    /**
      * fit   做训练
      * transform   做预测
      */
    spark.stop()
  }
}
